library(ggplot2)
library(dplyr)
library(tidyr)
library(reshape2)
library(lme4)
library(lmerTest)
library(glmmTMB)
library(DHARMa)
library(patchwork)

#####Loading dataframes#####
setwd("/Users/louis/Library/CloudStorage/GoogleDrive-louis.bubrig@gmail.com/My Drive/Work/Gibson Lab/Projects/Epidemic Timeline/")

host_dimensions <- read.csv(file = "./Data/dimensiondata.csv", header = TRUE)
parasite_particles <- read.csv(file = "./Data/particledata.csv", header = TRUE) %>%
  group_by(Strain, SDS, Block, Timepoint, Plate, WormID, ImageName) %>%
  summarize(TotalParticleArea = sum(Area))

CombinedDF <- merge(host_dimensions, parasite_particles, all.x = TRUE) %>%
  mutate_all(~replace(., is.na(.), 0)) %>%
  mutate(LoadPercent = TotalParticleArea / Area) %>%
  mutate(IsInfected = case_when(LoadPercent > 0 ~ TRUE,
                                LoadPercent == 0 ~ FALSE)) %>%
  mutate(SDS = as.factor(SDS),
         Strain = as.factor(Strain)) %>%
  mutate(SDS = relevel(SDS, ref = "Pre"),
         Strain = relevel(Strain, ref = "N2"))

#####Spore Acquisition Visualization####
data <- read.csv("./Data/sporeacquisition.csv")

data <- dplyr::mutate(data, Block = as.factor(Block),
                      Age = as.factor(Age), Rep = as.factor(Rep))
data$Age <- factor(data$Age, levels = c("L1", "L4", "A"))
levels <- dplyr::select(data, Age, Dose, Block, Rep)
levels <- dplyr::distinct(levels)
data <- dplyr::mutate(data, NewPlate = paste(Age, Dose, Block, Rep,
                                             sep = ":"))
plate_levels <- unique(data$NewPlate)
plate_levels <- data.frame(cbind(NewPlate = plate_levels,
                                 PlateId = seq_along(plate_levels)))
data <- dplyr::left_join(data, plate_levels)
data <- dplyr::mutate(data, ExposurePop = as.factor(PlateId))

#Test for Age*Dose interaction first
mod1 <- glmmTMB(data = data, family = nbinom1(), formula =
                  SporeCount ~ Age * Dose + Block + (1 | ExposurePop))
testDispersion(mod1)
testZeroInflation(mod1)
plot(simulateResiduals(mod1))

mod2 <- glmmTMB(data = data, family = nbinom1(), formula =
                  SporeCount ~ Age + Dose + Block + (1 | ExposurePop))
anova(mod1, mod2) #interaction not supported

#Try dropping Age
mod3 <- glmmTMB(data = data, family = nbinom1(), formula =
                  SporeCount ~  Dose + Block + (1 | ExposurePop))
anova(mod2, mod3) #Age is clearly important here
summary(mod2)

# Bootstrapping for CIs ####
## generate covariate frame and predictions ####
cov_df <- unique(dplyr::select(data, Dose, Age, Block))
cov_df <- dplyr::mutate(cov_df, ExposurePop = NA)
preds_df <- predict(mod2, newdata = cov_df, type = "link")
preds_df <- cbind(cov_df, pred_link = preds_df)

# bootstrap model predictions by treatment group for CI ####
## set up computing cluster - CHANGE ME ##############################
sims <- 1000 #number of bootstrap sims per group
total_cores <- parallel::detectCores(logical = FALSE)
use_cores <- floor((total_cores - 1))
cluster <- snow::makeCluster(use_cores, type = "SOCK",
                             outfile = "snow_out.txt")
snow::clusterEvalQ(cl = cluster, library(Matrix))
snow::clusterEvalQ(cl = cluster, library(lme4))
snow::clusterEvalQ(cl = cluster, library(glmmTMB))
snow::clusterEvalQ(cl = cluster, set.seed(1234))
snow::clusterExport(cl = cluster, "cov_df")
snow::clusterExport(cl = cluster, "mod2")
alarm_logical <- TRUE #if true, fanfare will sound when cl completes

## bootstrap with cluster ####
# with n = 10 on 6 cores, will take about 12 seconds
# with n = 1000, will take about 20 minutes
# with n = 1000 on 3 cores, will take 17-20 minutes
boot_time_0 <- Sys.time()
boot_list <- lme4::bootMer(mod2, nsim = sims, parallel = "snow",
                           cl = cluster, FUN = function(x) {
                             predict(x, newdata = cov_df)
                           })
boot_list <- list(as.list(boot_list), purrr::transpose(cov_df))
boot_time_1 <- Sys.time()
snow::stopCluster(cluster)
boot_time_1 - boot_time_0
saveRDS(boot_list, "new_boot_results_n3.RDS")
if (alarm_logical) {
  beepr::beep(3)
}

## wrangle boot results to dataframe ####
result_matrix <- purrr::pluck(boot_list, 1, 2)
boot_df <- NULL
for (i in seq_len(ncol(result_matrix))) {
  inner_df <- as.data.frame(result_matrix[, i])
  inner_df <- dplyr::mutate(inner_df, cov_df[i, ])
  names(inner_df)[1] <- "boot_values"
  inner_df <- dplyr::mutate(inner_df, Group = i)
  boot_df <- rbind(boot_df, inner_df)
}
write.csv(boot_df, "new_boot_df.csv", row.names = FALSE)
######################################################################
boot_df <- read.csv("./Data/bootstrapvalues.csv")
## get confidence intervals
CI <- .95
lwr_prob <- (1 - CI) / 2
upr_prob <- 1 - lwr_prob
conf_df <- NULL
for (i in unique(boot_df$Group)) {
  this_group_values <- dplyr::filter(boot_df, Group == i)
  inner_conf_df <- dplyr::select(this_group_values, -boot_values)
  inner_conf_df <- inner_conf_df[1, ]
  this_group_values <- this_group_values$boot_values
  lwr_value <- quantile(this_group_values, lwr_prob, na.rm = TRUE)
  upr_value <- quantile(this_group_values, upr_prob, na.rm = TRUE)
  inner_conf_df <- dplyr::mutate(inner_conf_df,
                                 lwr_CI_link = lwr_value,
                                 upr_CI_link = upr_value)
  conf_df <- rbind(conf_df, inner_conf_df)
}
conf_df <- dplyr::mutate(conf_df, Block = as.factor(Block),
                         Age = as.factor(Age),
                         Group = as.factor(Group))
conf_df$Age <- factor(conf_df$Age, levels = c("L1", "L4", "A"))
conf_df <- dplyr::full_join(preds_df, conf_df) #bind to preds

## transform response scale ####
conf_df <- dplyr::mutate(conf_df, pred_resp = exp(pred_link),
                         lwr_CI_resp = exp(lwr_CI_link),
                         upr_CI_resp = exp(upr_CI_link))

# Make figures ####
## Elements ####
### new variable for x-scale position ####
conf_df <- dplyr::mutate(conf_df, Plot_Dose =
                           case_when(Block == 2 ~ Dose - 2.5,
                                     Block == 3 ~ Dose + 2.5,
                                     .default = Dose))

### Theme elements ####
theme_set(theme_minimal())
theme_update(
  axis.title = element_text(size = 16, face = "bold"),
  axis.text = element_text(size = 14),
  panel.border = element_rect(linewidth = 1, fill = NA),
  strip.text = element_text(size = 18, face = "bold"),
  plot.title = element_text(size = 16, face = "bold"),
  legend.title = element_text(size = 12, face = "bold"),
  legend.text = element_text(size = 12)
)

color_scale <- scale_color_manual(
  values = c("lightgreen", "darkgrey", "grey35"),
  labels = c("L1", "L4", "Adult"),
  aesthetics = c("color", "fill")
)
fill_scale <- scale_fill_manual(
  values = c("lightgreen", "darkgrey", "grey35"),
  labels = c("L1", "L4", "Adult"),
  aesthetics = c("color", "fill")
)
set.seed(1234)

### Plot elements ####
#### Geoms ####
data_geom <- geom_jitter(data = data, aes(Dose, SporeCount,
                                          color = Age),
                         alpha = 0.6, height = 5, width = 5, size = 1,
                         show.legend = FALSE)
pred_geom <- geom_jitter(data = conf_df, aes(Plot_Dose, pred_resp,
                                             fill = Age),
                         shape = 21, size = 4, stroke = 1, position = "identity")
conf_geom <- geom_errorbar(data = conf_df, aes(Plot_Dose,
                                               ymin = lwr_CI_resp,
                                               ymax = upr_CI_resp,
                                               group = Age),
                           width = 2, linewidth = 0.75, position = "identity")
conf_geom <- geom_errorbar(aes(Plot_Dose, ymin = lwr_CI_resp,
                               ymax = upr_CI_resp, group = Age),
                           data = conf_df, width = 2, linewidth = 0.75,
                           position = "identity")
hist_geom <- geom_histogram(aes(SporeCount, fill = Age), binwidth = 5)
hist_patch_geom <- geom_histogram((aes(SporeCount, fill = Age)),
                                  binwidth = 5, show.legend = FALSE)

### other elements ####
age_facet <- facet_wrap(vars(Age))
hist_labs <- labs(x = "Spore Count", y = "Frequency",
                  fill = "Age Class")
plot_labs <- labs(x = "Dose (µL Spore Stock)", y = "Spore Count", fill = "Life Stage")
dose_scale <- scale_x_continuous(limits = c(10, 90),
                                 breaks = c(20, 40, 80))

## Plots ####
hist_plot <- ggplot(data) + hist_geom + color_scale + age_facet +
  hist_labs
hist_patch <- ggplot(data) + hist_patch_geom + color_scale +
  age_facet + hist_labs
full_plot <- ggplot(conf_df) + data_geom + pred_geom + conf_geom +
  plot_labs + color_scale + fill_scale + dose_scale
pred_plot <- ggplot(conf_df) + pred_geom + conf_geom + plot_labs +
  color_scale + dose_scale

full_patches <- full_plot + labs(title = "A.") + hist_patch +
  labs(title = "B.") + plot_layout(nrow = 2, heights = c(5, 2.5))
pred_patches <- pred_plot + hist_patch +
  plot_layout(nrow = 2, heights = c(5, 2.5))

## Save figs ####
#ggsave(hist_plot, filename = "./Figures/new_hist_plot.png", height = 5,
#       width = 10, dpi = 300)
#ggsave(full_plot, filename = "./Figures/new_full_plot.png", height = 7,
#       width = 7, dpi = 300)
#ggsave(pred_plot, filename = "./Figures/new_pred_plot.png", height = 7,
#       width = 7, dpi = 300)
ggsave(full_patches, filename = "./Figures/Manuscript/SporeAcquisition.png",
       height = 8, width = 8, dpi = 300)
#ggsave(pred_patches, filename = "./Figures/new_pred_patches.png",
#       height = 8, width = 8, dpi = 300)


####Spore Acquisition Statistical Analysis####
data <- read.csv("./Data/sporeacquisition.csv")
data <- dplyr::mutate(
  data,
  Block = as.factor(Block),
  Age = as.factor(Age),
  Rep = as.factor(Rep)
)
data$Age <- factor(data$Age, levels = c("L1", "L4", "A"))

levels <- dplyr::select(data, Age, Dose, Block, Rep)
levels <- dplyr::distinct(levels)
data <- dplyr::mutate(
  data,
  NewPlate = paste(Age, Dose, Block, Rep, sep = ":")
)
plate_levels <- unique(data$NewPlate)
plate_levels <- data.frame(
  cbind(NewPlate = plate_levels, PlateId = seq_along(plate_levels))
)
data <- dplyr::left_join(data, plate_levels)
data <- dplyr::mutate(
  data,
  ExposurePop = as.factor(PlateId)
)

data <- dplyr::mutate(data, ScaledDose = Dose/10)


#Test for Age*Dose interaction first
mod1 <- glmmTMB(data = data,
                formula = SporeCount ~ Age*Dose + Block + (1|ExposurePop),
                family = nbinom1())
testDispersion(mod1)
testZeroInflation(mod1)
plot(simulateResiduals(mod1))

mod2 <- glmmTMB(data = data,
                formula = SporeCount ~ Age + Dose + Block + (1|ExposurePop),
                family = nbinom1())
anova(mod1, mod2) #interaction not supported

#Try dropping Age
mod2 <- glmmTMB(data = data,
                formula = SporeCount ~ Age + Dose + Block + (1|ExposurePop),
                family = nbinom1())
mod3 <- glmmTMB(data = data,
                formula = SporeCount ~  Dose + Block + (1|ExposurePop),
                family = nbinom1())
anova(mod2, mod3) #Age is clearly important here
summary(mod2)
  

####Infection prevalence visualization####
PlateDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  group_by(Strain, Block, Timepoint, Plate) %>%
  summarize(PlatePrevalence = mean(IsInfected)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

MeanDF <- PlateDF %>%
  group_by(Strain, DaysPostExposure) %>%
  summarize(MoMPrevalence = mean(PlatePrevalence),
            SD = sd(PlatePrevalence),
            N = n()) %>%
  mutate(SE = SD / sqrt(N))

tiff("./Figures/Manuscript/PopulationPrevalence.tiff", units="in", width = 5, height = 5, res=500)
ggplot(data = MeanDF, aes(x = DaysPostExposure, y = MoMPrevalence)) +
  facet_grid(Strain~.) +
  geom_ribbon(aes(ymin = 0, ymax = MoMPrevalence),
              fill = "darkred",
              alpha = 0.2) +
  geom_point(data = PlateDF, 
             aes(x = DaysPostExposure,
                 y = PlatePrevalence,
                 shape = Strain),
             size = 3,
             alpha = 0.3,
             color = "grey2",
             position = position_jitter(width = 0.2)) +
  geom_line(color = "darkgrey",
            linewidth = 1) +
  geom_errorbar(aes(x = DaysPostExposure + 0.4,
                    ymin = MoMPrevalence - SE,
                    ymax = MoMPrevalence + SE),
                linewidth = 0.75,
                width = 0.2) +
  geom_point(aes(shape = Strain),
             size = 4) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylab("Infection Prevalence") +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  theme(axis.title.x = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.title.y = element_text(size = 20, face = "bold"),
        panel.border = element_rect(linewidth = 2, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        legend.position = "none")
dev.off()


#####Prevalence by life stage Visualization####
PlottingDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  mutate(LengthBin = cut(Length, breaks = c(0, 370, 480, 640, 850, 2000))) %>%
  mutate(LifeStage = case_when(LengthBin == "(0,370]" ~ "L1",
                              LengthBin == "(370,480]" ~ "L2",
                              LengthBin == "(480,640]" ~ "L3",
                              LengthBin == "(640,850]" ~ "L4",
                              LengthBin == "(850,2e+03]" ~ "Adult")) %>%
  group_by(Block, Strain, Timepoint, Plate, LifeStage) %>%
  summarize(Prevalence = mean(IsInfected)) %>%
  group_by(Strain, Timepoint, LifeStage) %>%
  summarize(meanPrevalence = mean(Prevalence),
            SD = sd(Prevalence),
            N = n()) %>%
  mutate(SE = SD/sqrt(N)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

PlottingDF$LifeStage <- as.factor(PlottingDF$LifeStage)
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "Adult")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L4")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L3")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L2")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L1")

tiff("./Figures/Manuscript/PrevalenceByLifeStage.tiff", units="in", width = 10, height = 5, res=500)
ggplot(data = PlottingDF, aes(x = LifeStage, y = meanPrevalence)) +
  facet_grid(Strain~DaysPostExposure, switch = "x") +
  geom_point(aes(fill = LifeStage),
             shape = 21,
             size = 5,
             stroke = 1) +
  geom_errorbar(aes(ymin = meanPrevalence - SE, 
                    ymax = meanPrevalence + SE),
                width = 0.4,
                linewidth = 0.75) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  ylab("Infection Prevalence") +
  scale_fill_manual(name = "Host Life Stage", values = c("lightgreen", "darkgreen", "lightgrey", "darkgrey", "grey35")) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.title.x = element_text(size = 16, face = "bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16, face = "bold"),
        panel.border = element_rect(size = 1, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        strip.text.x = element_text(size = 12),
        legend.title = element_text(size = 12, face = "bold"),
        legend.text = element_text(size = 12))
dev.off()


#####Prevalence statistical analysis####
DataDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  mutate(UniquePlateID = paste(Strain, Block, DaysPostExposure, Plate, sep="_")) %>%
  mutate(UniqueWormID = paste(UniquePlateID, WormID, sep="_")) %>%
  mutate(RescaledLength = Length / 100) %>% #Worms are now measured in units of 100 µm
  select(Strain, DaysPostExposure, Block, Plate, UniquePlateID, UniqueWormID, RescaledLength, IsInfected)
DataDF$UniquePlateID <- as.factor(DataDF$UniquePlateID)
DataDF$UniqueWormID <- as.factor(DataDF$UniqueWormID)
DataDF$Block <- as.factor(DataDF$Block)


#Looks like there might be a DaysPostExposure*RescaledLength interaction, so we'll test that
mod1 <- glmmTMB(data = DataDF,
                formula = IsInfected~DaysPostExposure*RescaledLength + Strain + Block + (1|UniquePlateID),
                family = binomial)
testDispersion(mod1)
testZeroInflation(mod1)
plot(simulateResiduals(mod1))

mod2 <- glmmTMB(data = DataDF,
                formula = IsInfected~DaysPostExposure + RescaledLength + Strain + Block + (1|UniquePlateID),
                family = binomial)
anova(mod1, mod2) #mod1 is better, so the interaction is retained
summary(mod1)


####Load by life stage visualization####
PlottingDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  mutate(LengthBin = cut(Length, breaks = c(0, 370, 480, 640, 850, 2000))) %>%
  mutate(LifeStage = case_when(LengthBin == "(0,370]" ~ "L1",
                               LengthBin == "(370,480]" ~ "L2",
                               LengthBin == "(480,640]" ~ "L3",
                               LengthBin == "(640,850]" ~ "L4",
                               LengthBin == "(850,2e+03]" ~ "Adult")) %>%
  group_by(Block, Strain, Timepoint, Plate, LifeStage) %>%
  summarize(PlateLoad = mean(LoadPercent)) %>%
  group_by(Strain, Timepoint, LifeStage) %>%
  summarize(meanLoad = mean(PlateLoad),
            SD = sd(PlateLoad),
            N = n()) %>%
  mutate(SE = SD/sqrt(N)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

PlottingDF$LifeStage <- as.factor(PlottingDF$LifeStage)
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "Adult")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L4")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L3")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L2")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L1")

tiff("./Figures/Manuscript/LoadByLifeStage.tiff", units="in", width = 10, height = 5, res=500)
ggplot(data = PlottingDF, aes(x = LifeStage, y = meanLoad)) +
  facet_grid(Strain~DaysPostExposure, switch = "x") +
  geom_point(aes(fill = LifeStage),
             shape = 21,
             size = 5,
             stroke = 1) +
  geom_errorbar(aes(ymin = meanLoad - SE, 
                    ymax = meanLoad + SE),
                width = 0.4,
                linewidth = 0.75) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  ylab("Infection Load") +
  scale_fill_manual(name = "Host Life Stage", values = c("lightgreen", "darkgreen", "lightgrey", "darkgrey", "grey35")) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.title.x = element_text(size = 16, face = "bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16, face = "bold"),
        panel.border = element_rect(size = 1, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        strip.text.x = element_text(size = 12),
        legend.title = element_text(size = 12, face = "bold"),
        legend.text = element_text(size = 12))
dev.off()



#####Load statistical analysis####
DataDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  mutate(UniquePlateID = paste(Strain, Block, DaysPostExposure, Plate, sep="_")) %>%
  mutate(RescaledLength = Length / 100) %>% #Worms are now measured in units of 100 µm
  filter(LoadPercent > 0) %>%
  select(Strain, Block, DaysPostExposure, UniquePlateID, RescaledLength, LoadPercent)
DataDF$UniquePlateID <- as.factor(DataDF$UniquePlateID)
DataDF$Block <- as.factor(DataDF$Block)

#Suspected interaction between DaysPostExposure and RescaledLength, so we'll test that
mod1 <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure*RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())
testDispersion(mod1)
testZeroInflation(mod1)
plot(simulateResiduals(mod1))

mod2 <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure + RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())
anova(mod1, mod2) #There is no statistical difference between these models, so we'll go with the simpler model

#Testing whether RescaledLength is supported
mod2 <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure + RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())
mod3 <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())
anova(mod2, mod3) #This are definitely different and RescaledLength is supported
summary(mod2)



##Checking for random effect structure first
#Saturated Model
mod1 <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure*RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())
mod1x <- glmmTMB(data = DataDF,
                 formula = LoadPercent~DaysPostExposure*RescaledLength + Strain + Block,
                 ziformula = ~1,
                 family = beta_family())
AIC(mod1, mod1x) #Model 1 is better, so the random effect is retained

##Tier 1
#Saturated model
mod1 <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure*RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family(),
                link = logit)
#Nested model with interaction dropped
mod1a <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure + RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())

#Nested model with Strain dropped
mod1b <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure*RescaledLength + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())

AIC(mod1, mod1a, mod1b) #Best model is mod1; however mod1a is very close so I will test those below


##Tier 2
#Saturated model (same as mod1a above)
mod2 <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure + RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())

#Nested model with DaysPostExposure dropped
mod2a <- glmmTMB(data = DataDF,
                formula = LoadPercent~RescaledLength + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())

#Nested model with RescaledLength dropped
mod2b <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure + Strain + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())

#Nested model with Strain dropped
mod2c <- glmmTMB(data = DataDF,
                formula = LoadPercent~DaysPostExposure + RescaledLength + Block + (1|UniquePlateID),
                ziformula = ~1,
                family = beta_family())
AIC(mod2, mod2a, mod2b, mod2c) #None of these make any substantial improvements, so I'll still go with mod1 being the best


#Winning model
summary(mod1)

#Null model (just the intercept)
modnull <- glmmTMB(data = DataDF,
                   formula = LoadPercent~1 + Block + (1|UniquePlateID),
                   ziformula = ~1,
                   family = beta_family())

AIC(mod1, mod1a, mod1b, modnull)


#####Dauer vs. Population Prevalence Visualization####
PlateDF <- CombinedDF %>%
  group_by(Strain, Block, Timepoint, Plate, SDS) %>%
  summarize(PlatePrevalence = mean(IsInfected)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

MeanDF <- PlateDF %>%
  group_by(Strain, DaysPostExposure, SDS) %>%
  summarize(MoMPrevalence = mean(PlatePrevalence),
            SD = sd(PlatePrevalence),
            N = n()) %>%
  mutate(SE = SD / sqrt(N)) 

tiff("./Figures/Manuscript/DauerPopulationPrevalence.tiff", units="in", width = 6, height = 5, res=500)
ggplot(data = MeanDF, aes(x = DaysPostExposure, y = MoMPrevalence, group = SDS)) +
  facet_grid(Strain~.) +
  geom_point(data = PlateDF, 
             aes(x = DaysPostExposure,
                 y = PlatePrevalence,
                 shape = Strain,
                 color = SDS),
             size = 3,
             alpha = 0.4,
             position = position_jitter(width = 0.2)) +
  geom_line(color = "darkgrey",
            linewidth = 1) +
  geom_errorbar(aes(x = DaysPostExposure + 0.4,
                    ymin = MoMPrevalence - SE,
                    ymax = MoMPrevalence + SE),
                linewidth = 0.75,
                width = 0.2) +
  geom_point(aes(shape = Strain,
                 color = SDS),
             size = 4) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylab("Infection Prevalence") +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_shape_manual(values = c(16, 17), guide = "none") +
  scale_color_manual(values = c("grey21", "orchid3"), name = "Group", labels = c("Whole\npopulation", "Dauers")) +
  theme(axis.title.x = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.title.y = element_text(size = 20, face = "bold"),
        panel.border = element_rect(linewidth = 2, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        legend.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12))
dev.off()


####Dauer vs. Population prevalence statistical analysis####
DataDF <- CombinedDF %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  mutate(UniquePlateID = paste(Strain, Block, DaysPostExposure, Plate, sep="_")) %>%
  select(Strain, DaysPostExposure, Block, Plate, UniquePlateID, SDS, IsInfected)
DataDF$UniquePlateID <- as.factor(DataDF$UniquePlateID)
DataDF$Block <- as.factor(DataDF$Block)

#Starting with the unrestricted model
mod1 <- glmmTMB(data = DataDF,
                formula = IsInfected~SDS + DaysPostExposure + Strain + Block + (1|UniquePlateID),
                family = binomial)
testDispersion(mod1)
testZeroInflation(mod1)
plot(simulateResiduals(mod1))

#Comparing to the restricted model with SDS dropped
mod2 <- glmmTMB(data = DataDF,
                formula = IsInfected~DaysPostExposure + Strain + Block + (1|UniquePlateID),
                family = binomial)
anova(mod1, mod2)
summary(mod1)


####Dauer vs. Population Load Visualization####
PlateDF <- CombinedDF %>%
  group_by(Strain, Block, Timepoint, Plate, SDS) %>%
  summarize(PlateLoad = mean(LoadPercent)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

MeanDF <- PlateDF %>%
  group_by(Strain, Timepoint, SDS) %>%
  summarize(MoMLoad = mean(PlateLoad),
            SD = sd(PlateLoad),
            N = n()) %>%
  mutate(SE = SD / sqrt(N)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

tiff("./Figures/Manuscript/DauerPopulationLoad.tiff", units="in", width = 6, height = 5, res=500)
ggplot(data = MeanDF, aes(x = DaysPostExposure, y = MoMLoad, group = SDS)) +
  facet_grid(Strain~.) +
  geom_point(data = PlateDF, 
             aes(x = DaysPostExposure,
                 y = PlateLoad,
                 shape = Strain,
                 color = SDS),
             size = 3,
             alpha = 0.4,
             position = position_jitter(width = 0.2)) +
  geom_line(color = "darkgrey",
            linewidth = 1) +
  geom_errorbar(aes(x = DaysPostExposure + 0.4,
                    ymin = MoMLoad - SE,
                    ymax = MoMLoad + SE),
                linewidth = 0.5,
                width = 0.2) +
  geom_point(aes(shape = Strain,
                 color = SDS),
             size = 4) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylab("Infection Load") +
  scale_y_continuous(labels = scales::percent) +
  scale_shape_manual(values = c(16, 17), guide = "none") +
  scale_color_manual(values = c("grey21", "orchid3"), name = "Group", labels = c("Whole\npopulation", "Dauers")) +
  theme(axis.title.x = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.title.y = element_text(size = 20, face = "bold"),
        panel.border = element_rect(linewidth = 2, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        legend.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12))
dev.off()

####Dauer vs. Population Load statistical analysis####
DataDF <- CombinedDF %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  mutate(UniquePlateID = paste(Strain, Block, DaysPostExposure, Plate, sep="_")) %>%
  filter(LoadPercent > 0) %>%
  select(Strain, Block, DaysPostExposure, UniquePlateID, SDS, LoadPercent)
DataDF$UniquePlateID <- as.factor(DataDF$UniquePlateID)
DataDF$Block <- as.factor(DataDF$Block)

#Looking visually, it appears that there's a Strain*SDS interaction. I'm testing whether the
#SDS predictor should be there, but first I'll test whether the interaction is there.
mod1 <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain*SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~Strain + SDS + DaysPostExposure)
testDispersion(mod1)
testZeroInflation(mod1)
plot(simulateResiduals(mod1))

mod2 <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain + SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~Strain + SDS + DaysPostExposure)
anova(mod1, mod2) #There is a sig difference, and the interaction is supported.

##Because there's an interaction with Strain, I want to break up the dataset by Strain and then test 
##the SDS predictor in them separately

#N2
N2DataDF <- DataDF %>%
  filter(Strain == "N2")

#Testing for SDS predictor
mod3 <- glmmTMB(data = N2DataDF,
                formula = LoadPercent~SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~SDS + DaysPostExposure)
mod4 <- glmmTMB(data = N2DataDF,
                formula = LoadPercent~DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~DaysPostExposure)
anova(mod3, mod4)
summary(mod3)


#CB4856
CBDataDF <- DataDF %>%
  filter(Strain == "CB4856")

CBDataDF$SDS <- relevel(CBDataDF$SDS, ref = "Post")

#Testing for SDS predictor
mod5 <- glmmTMB(data = CBDataDF,
                formula = LoadPercent~SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~SDS + DaysPostExposure)
mod6 <- glmmTMB(data = CBDataDF,
                formula = LoadPercent~DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~SDS + DaysPostExposure)
anova(mod5, mod6)
summary(mod5)




#####Linear load Visualization#####
setwd("/Users/louis/Library/CloudStorage/GoogleDrive-louis.bubrig@gmail.com/My Drive/Work/Gibson Lab/Projects/Epidemic Timeline/Timeline Experiment/")

linearloaddata1 <- read.csv(file = "./Data/Partial Datasets/linearloaddata1.csv", header = TRUE)
linearloaddata1 <- linearloaddata1 %>%
  dplyr::select(Strain, SDS, Block, Timepoint, Plate, WormID, ImageName, DistAlongGut, Value)

linearloaddata2 <- read.csv(file = "./Data/Partial Datasets/linearloaddata2.csv", header = TRUE)
linearloaddata2 <- linearloaddata2 %>%
  dplyr::select(Strain, SDS, Block, Timepoint, Plate, WormID, ImageName, DistAlongGut, Value)

linearloaddata3 <- read.csv(file = "./Data/Partial Datasets/linearloaddata3.csv", header = TRUE)
linearloaddata3 <- linearloaddata3 %>%
  dplyr::select(Strain, SDS, Block, Timepoint, Plate, WormID, ImageName, DistAlongGut, Value)

linearloaddata <- rbind(linearloaddata1, linearloaddata2, linearloaddata3)
write.csv(linearloaddata, "./Data/linearloaddata.csv", row.names = FALSE)

linearloaddata <- read.csv(file = "./Data/linearloaddata.csv", header = TRUE)

PlateDF <- linearloaddata %>%
  drop_na() %>%
  mutate(BinaryValue = case_when(Value == 0~0,
                                 Value != 0~1)) %>%
  group_by(Strain, SDS, Block, Timepoint, Plate, WormID) %>%
  summarize(LinearLoad = mean(BinaryValue)) %>%
  group_by(Strain, SDS, Block, Timepoint, Plate) %>%
  summarize(PlateLinearLoad = mean(LinearLoad)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))
PlateDF$Strain <- as.factor(PlateDF$Strain)
PlateDF$Strain <- relevel(PlateDF$Strain, ref = "N2")
PlateDF$SDS <- as.factor(PlateDF$SDS)
PlateDF$SDS <- relevel(PlateDF$SDS, ref = "Pre")

MeanDF <- PlateDF %>%
  group_by(Strain, DaysPostExposure, SDS) %>%
  summarize(meanLinearLoad = mean(PlateLinearLoad),
            SD = sd(PlateLinearLoad),
            N = n()) %>%
  mutate(SE = SD / sqrt(N))

tiff("./Figures/Manuscript/DauerPopulationLinearLoad.tiff", units="in", width = 6, height = 5, res=500)
ggplot(data = MeanDF, aes(x = DaysPostExposure, y = meanLinearLoad, group = SDS)) +
  facet_grid(Strain~.) +
  geom_point(data = PlateDF, 
             aes(x = DaysPostExposure,
                 y = PlateLinearLoad,
                 shape = Strain,
                 color = SDS),
             size = 3,
             alpha = 0.4,
             position = position_jitter(width = 0.2)) +
  geom_line(color = "darkgrey",
            linewidth = 1) +
  geom_errorbar(aes(x = DaysPostExposure + 0.4,
                    ymin = meanLinearLoad - SE,
                    ymax = meanLinearLoad + SE),
                linewidth = 0.5,
                width = 0.2) +
  geom_point(aes(shape = Strain,
                 color = SDS),
             size = 4) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylab("% of Body Length Infected") +
  scale_y_continuous(labels = scales::percent) +
  scale_shape_manual(values = c(16, 17), guide = "none") +
  scale_color_manual(values = c("grey21", "orchid3"), name = "Group", labels = c("Whole\npopulation", "Dauers")) +
  theme(axis.title.x = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.title.y = element_text(size = 20, face = "bold"),
        panel.border = element_rect(linewidth = 2, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        legend.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12))
dev.off()


####Linear load statistical analysis####
linearloaddata <- read.csv(file = "./Data/linearloaddata.csv", header = TRUE)
DataDF <- linearloaddata %>%
  drop_na() %>%
  mutate(BinaryValue = case_when(Value == 0~0,
                                 Value != 0~1)) %>%
  group_by(Strain, SDS, Block, Timepoint, Plate, WormID) %>%
  summarize(LinearLoad = mean(BinaryValue)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                    Timepoint == 7~10,
                                    .default = Timepoint)) %>%
  mutate(UniquePlateID = paste(Strain, Block, DaysPostExposure, Plate, sep="_")) %>%
  filter(LinearLoad > 0) %>%
  filter(LinearLoad < 1) %>%
  ungroup() %>%
  select(Strain, Block, DaysPostExposure, UniquePlateID, SDS, LinearLoad) 
DataDF$Strain <- as.factor(DataDF$Strain)
DataDF$Strain <- relevel(DataDF$Strain, ref = "N2")
DataDF$Block <- as.factor(DataDF$Block)
DataDF$UniquePlateID <- as.factor(DataDF$UniquePlateID)
DataDF$SDS <- as.factor(DataDF$SDS)
DataDF$SDS <- relevel(DataDF$SDS, ref = "Pre")


#Test for Strain*SDS interaction first
mod1 <- glmmTMB(data = DataDF,
                formula = LinearLoad~Strain*SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~1)

testDispersion(mod1)
testZeroInflation(mod1)
plot(simulateResiduals(mod1))

mod2 <- glmmTMB(data = DataDF,
                formula = LinearLoad~Strain + SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~1)
anova(mod1, mod2) #There is a sig difference, and the interaction is supported.

##Because there's an interaction with Strain, I want to break up the dataset by Strain and then test 
##the SDS predictor in them separately

#N2
N2DataDF <- DataDF %>%
  filter(Strain == "N2")

#Testing for SDS predictor
mod3 <- glmmTMB(data = N2DataDF,
                formula = LinearLoad~SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~1)
mod4 <- glmmTMB(data = N2DataDF,
                formula = LinearLoad~DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~1)
anova(mod3, mod4)
summary(mod3)

#CB4856
CBDataDF <- DataDF %>%
  filter(Strain == "CB4856")

#Testing for SDS predictor
mod5 <- glmmTMB(data = CBDataDF,
                formula = LinearLoad~SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~1)
mod6 <- glmmTMB(data = CBDataDF,
                formula = LinearLoad~DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family(),
                dispformula = ~1)
anova(mod5, mod6)
summary(mod5)


####Life stage stacked bars visualization####
PlottingDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  mutate(LengthBin = cut(Length, breaks = c(0, 370, 480, 640, 850, 2000))) %>%
  mutate(LifeStage = case_when(LengthBin == "(0,370]" ~ "L1",
                              LengthBin == "(370,480]" ~ "L2",
                              LengthBin == "(480,640]" ~ "L3",
                              LengthBin == "(640,850]" ~ "L4",
                              LengthBin == "(850,2e+03]" ~ "Adult")) %>%
  group_by(Strain, Timepoint, LifeStage) %>%
  summarize(Count = n()) %>%
  group_by(Strain, Timepoint) %>%
  mutate(TotalIndividuals = sum(Count)) %>%
  mutate(Proportion = Count / TotalIndividuals) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))
PlottingDF$LifeStage <- as.factor(PlottingDF$LifeStage)
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L1")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L2")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L3")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "L4")
PlottingDF$LifeStage <- relevel(PlottingDF$LifeStage, ref = "Adult")

tiff("./Figures/Manuscript/LifeStageBreakdown.tiff", units="in", width = 8, height = 5, res=500)
ggplot(data = PlottingDF, aes(x = DaysPostExposure, y = Proportion)) +
  facet_grid(Strain~.) +
  geom_col(aes(fill = LifeStage)) +
  scale_fill_manual(values = c("grey2", "grey35", "lightgrey", "darkgreen", "lightgreen")) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylab("Life Stage Proportion") +
  scale_y_continuous(labels = scales::percent) +
  labs(fill = "Life Stage") +
  theme(axis.title.x = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.title.y = element_text(size = 20, face = "bold"),
        panel.border = element_rect(linewidth = 2, fill = NA),
        strip.text.y = element_text(size = 18, face = "bold"),
        legend.title = element_text(size = 16, face = "bold"),
        legend.text = element_text(size = 14))
dev.off()


#####Population SI Visualization####
population_count <- read.csv(file = "./Data/livecountdata.csv", header = TRUE)

CountDF <- population_count %>%
  filter(Treatment == "Exposed") %>%
  filter(SDS == "Pre") %>%
  mutate(DilutedCountPerUL = Count / AliquotVolUL) %>%
  mutate(UndilutedCountPerUL = DilutedCountPerUL * DilutionFactor) %>%
  mutate(SampleCount = UndilutedCountPerUL * TotalVolUL) %>%
  group_by(Strain, Block, Timepoint, Plate) %>%
  summarize(PlateCount = mean(SampleCount)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  group_by(Strain, DaysPostExposure) %>%
  summarize(meanCount = mean(PlateCount))

PrevalenceDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  group_by(Strain, Block, Timepoint, Plate) %>%
  summarize(PlatePrevalence = mean(IsInfected)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  group_by(Strain, DaysPostExposure) %>%
  summarize(meanPrevalence = mean(PlatePrevalence))

PlottingDF <- merge(CountDF, PrevalenceDF) %>%
  mutate(UninfectedCount = meanCount * (1 - meanPrevalence),
         InfectedCount = meanCount * meanPrevalence)
PlottingDF$Strain <- as.factor(PlottingDF$Strain)
PlottingDF$Strain <- relevel(PlottingDF$Strain, ref = "N2")


tiff("./Figures/Manuscript/PopulationSizeSI.tiff", units="in", width = 5, height = 5, res=500)
ggplot(data = PlottingDF, aes(x = DaysPostExposure)) +
  facet_grid(Strain~.) +
  geom_vline(xintercept = 1,
             linetype = "dashed",
             linewidth = 1,
             color = "darkgrey",
             alpha = 0.8) +
  geom_vline(xintercept = 2,
             linetype = "dashed",
             linewidth = 1,
             color = "darkgrey",
             alpha = 0.8) +
  geom_vline(xintercept = 3,
             linetype = "dashed",
             linewidth = 1,
             color = "darkgrey",
             alpha = 0.8) +
  geom_vline(xintercept = 4,
             linetype = "dashed",
             linewidth = 1,
             color = "darkgrey",
             alpha = 0.8) +
  geom_vline(xintercept = 5,
             linetype = "dashed",
             linewidth = 1,
             color = "darkgrey",
             alpha = 0.8) +
  geom_vline(xintercept = 8,
             linetype = "dashed",
             linewidth = 1,
             color = "darkgrey",
             alpha = 0.8) +
  geom_vline(xintercept = 10,
             linetype = "dashed",
             linewidth = 1,
             color = "darkgrey",
             alpha = 0.8) +
  geom_ribbon(aes(x = DaysPostExposure, 
                  ymin = 0,
                  ymax = InfectedCount),
              fill = "darkred") +
  geom_ribbon(aes(x = DaysPostExposure, 
                  ymin = InfectedCount,
                  ymax = InfectedCount + UninfectedCount),
              fill = "darkgrey") +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylab("Population Size") +
  theme(axis.title.x = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.title.y = element_text(size = 20, face = "bold"),
        panel.border = element_rect(linewidth = 2, fill = NA),
        strip.text.y = element_text(size = 18, face = "bold"))
dev.off()


####Length by Strain and Timepoint#####
DataDF <- CombinedDF %>%
  filter(SDS == "Pre")

PlottingDF <- DataDF %>%
mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                    Timepoint == 7~10,
                                    .default = Timepoint))

tiff("./Figures/Manuscript/LengthViolin.tiff", units="in", width = 5, height = 5, res=500)
ggplot(data = PlottingDF, aes(x = DaysPostExposure, y = Length)) +
  facet_grid(Strain~.) +
  geom_ribbon(aes(ymin = 0, ymax = 370),
              alpha = 0.3,
              fill = "lightgreen") +
  geom_ribbon(aes(ymin = 370, ymax = 480),
              alpha = 0.3,
              fill = "darkgreen") +
  geom_ribbon(aes(ymin = 480, ymax = 640),
              alpha = 0.3,
              fill = "lightgrey") +
  geom_ribbon(aes(ymin = 640, ymax = 850),
              alpha = 0.3,
              fill = "grey24") +
  geom_ribbon(aes(ymin = 850, ymax = 1200),
              alpha = 0.3,
              fill = "black") +
  geom_hline(yintercept = 370,
             color = "darkgrey",
             linetype = "dashed",
             linewidth = 1) +
  geom_hline(yintercept = 480,
             color = "darkgrey",
             linetype = "dashed",
             linewidth = 1) +
  geom_hline(yintercept = 640,
             color = "darkgrey",
             linetype = "dashed",
             linewidth = 1) +
  geom_hline(yintercept = 850,
             color = "darkgrey",
             linetype = "dashed",
             linewidth = 1) +
  geom_violin(aes(group = DaysPostExposure),
              linewidth = 0.5) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylim(0, 1200) +
  ylab("Host length (µm)") +
  theme(axis.title.x = element_text(size = 16, face = "bold"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16, face = "bold"),
        panel.border = element_rect(linewidth = 1, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"))
dev.off()



####Dauer Count Visualization####
dauer_count <- read.csv(file = "./Data/livecountdata.csv", header = TRUE)

PlateDF <- pop_count %>%
  filter(Treatment == "Exposed") %>%
  filter(SDS == "Post") %>%
  mutate(DilutedCountPerUL = Count / AliquotVolUL) %>%
  mutate(UndilutedCountPerUL = DilutedCountPerUL * DilutionFactor) %>%
  mutate(SampleCount = UndilutedCountPerUL * TotalVolUL) %>%
  group_by(Strain, Block, Timepoint, Plate) %>%
  summarize(PlateCount = mean(SampleCount)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))
PlateDF$Strain <- as.factor(PlateDF$Strain)
PlateDF$Strain <- relevel(PlateDF$Strain, ref = "N2")

MeanDF <- PlateDF %>%
  group_by(Strain, DaysPostExposure) %>%
  summarize(MoMCount = mean(PlateCount),
            SD = sd(PlateCount),
            N = n()) %>%
  mutate(SE = SD / sqrt(N))


tiff("./Figures/Manuscript/DauerCount.tiff", units="in", width = 5, height = 5, res=500)
ggplot(data = MeanDF, aes(x = DaysPostExposure, y = MoMCount)) +
  facet_grid(Strain~.) +
  geom_ribbon(aes(ymin = 0,
                  ymax = MoMCount),
              fill = "orchid3",
              alpha = 0.2) +
  geom_point(data = PlateDF,
             aes(x = DaysPostExposure,
                 y = PlateCount, 
                 shape = Strain),
             size = 3,
             position = position_jitter(width = 0.1),
             color = "darkgrey",
             alpha = 0.85) +
  geom_line(linewidth = 1,
            color = "darkgrey") +
  geom_errorbar(aes(x = DaysPostExposure + 0.3,
                    ymin = MoMCount - SE,
                    ymax = MoMCount + SE),
                linewidth = 0.75,
                width = 0.2) +
  geom_point(aes(shape = Strain),
             size = 4) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(1, 10, 1)) +
  ylab("Dauer Count") +
  theme(axis.title.x = element_text(size = 16, face = "bold"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16, face = "bold"),
        panel.border = element_rect(linewidth = 1, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        legend.position = "none")
dev.off()


####Paired dauer vs. population prevalence Visualization####
PlottingDF <- CombinedDF %>%
  group_by(Strain, Block, Timepoint, Plate, SDS) %>%
  summarize(PlatePrevalence = mean(IsInfected)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  mutate(BlockPlateID = paste(Block, Plate, sep = "_"))
PlottingDF$SDS <- as.factor(PlottingDF$SDS)


tiff("./Figures/Manuscript/DauerPopulationPrevalencePaired.tiff", units="in", width = 10, height = 5, res=500)
ggplot(data = PlottingDF, aes(x = SDS, y = PlatePrevalence, group = BlockPlateID)) +
  facet_grid(Strain~DaysPostExposure, switch = "x") +
  geom_line(color = "grey",
            linewidth = 1,
            position = position_dodge(width = 0.4)) +
  geom_point(aes(shape = Strain,
                 color = SDS),
             size = 5,
             alpha = 0.85,
             position = position_dodge(width = 0.4)) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  ylab("Infection Prevalence") +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_color_manual(values = c("grey21", "orchid3"), name = "Group", labels = c("Whole\npopulation", "Dauers")) +
  scale_shape_manual(values = c(16, 17), guide = "none") +
  theme(axis.title.x = element_text(size = 16, face = "bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16, face = "bold"),
        panel.border = element_rect(size = 1, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        strip.text.x = element_text(size = 12),
        legend.title = element_text(size = 12, face = "bold"),
        legend.text = element_text(size = 12))
dev.off()


####Paired dauer vs. population load Visualization#####
PlottingDF <- CombinedDF %>%
  group_by(Strain, Block, Timepoint, Plate, SDS) %>%
  summarize(PlateLoad = mean(LoadPercent)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  mutate(BlockPlateID = paste(Block, Plate, sep = "_"))
PlottingDF$SDS <- as.factor(PlottingDF$SDS)


tiff("./Figures/Manuscript/DauerPopulationLoadPaired.tiff", units="in", width = 10, height = 5, res=500)
ggplot(data = PlottingDF, aes(x = SDS, y = PlateLoad, group = BlockPlateID)) +
  facet_grid(Strain~DaysPostExposure, switch = "x") +
  geom_line(color = "grey",
            linewidth = 1,
            position = position_dodge(width = 0.4)) +
  geom_point(aes(shape = Strain,
                 color = SDS),
             size = 5,
             alpha = 0.85,
             position = position_dodge(width = 0.4)) +
  theme_minimal() +
  xlab("Day of Epidemic") +
  ylab("Infection Load") +
  scale_y_continuous(labels = scales::percent) +
  scale_color_manual(values = c("grey21", "orchid3"), name = "Group", labels = c("Whole\npopulation", "Dauers")) +
  scale_shape_manual(values = c(16, 17), guide = "none") +
  theme(axis.title.x = element_text(size = 16, face = "bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 16, face = "bold"),
        panel.border = element_rect(size = 1, fill = NA),
        strip.text.y = element_text(size = 16, face = "bold"),
        strip.text.x = element_text(size = 12),
        legend.title = element_text(size = 12, face = "bold"),
        legend.text = element_text(size = 12))
dev.off()


















####TEST####
DataDF <- CombinedDF %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint)) %>%
  mutate(UniquePlateID = paste(Strain, Block, DaysPostExposure, Plate, sep="_")) %>%
  filter(LoadPercent > 0) %>%
  mutate(RescaledLength = Length / 100) %>%
  select(Strain, Block, DaysPostExposure, UniquePlateID, SDS, RescaledLength, LoadPercent)
DataDF$UniquePlateID <- as.factor(DataDF$UniquePlateID)
DataDF$Block <- as.factor(DataDF$Block)


##Checking for random effect structure first
#Saturated Model
mod1 <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain*SDS + DaysPostExposure + RescaledLength + Block + (1|UniquePlateID),
                family = beta_family())
mod1x <- glmmTMB(data = DataDF,
                 formula = LoadPercent~Strain*SDS + DaysPostExposure + RescaledLength + Block,
                 family = beta_family())
AIC(mod1, mod1x) #Model 1 is better, so the random effect is retained

##Tier 1
#Saturated model
mod1 <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain*SDS + DaysPostExposure + RescaledLength + Block + (1|UniquePlateID),
                family = beta_family())
#Nested model with interaction dropped
mod1a <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain + SDS + DaysPostExposure + RescaledLength + Block + (1|UniquePlateID),
                family = beta_family())
#Nested model with DaysPostExposure dropped
mod1b <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain*SDS + RescaledLength + Block + (1|UniquePlateID),
                family = beta_family())
#Nested model with RescaledLength dropped
mod1c <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain*SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family())


AIC(mod1, mod1a, mod1b, mod1c) #mod1a is slightly better, so we will drop the interaction and continue


##Tier 2
#Saturated model (same as mod1a above)
mod2 <- glmmTMB(data = DataDF,
                 formula = LoadPercent~Strain + SDS + DaysPostExposure + RescaledLength + Block + (1|UniquePlateID),
                 family = beta_family())
#Nested model with strain dropped
mod2a <- glmmTMB(data = DataDF,
                formula = LoadPercent~SDS + DaysPostExposure + RescaledLength + Block + (1|UniquePlateID),
                family = beta_family())
#Nested model with SDS dropped
mod2b <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain + DaysPostExposure + RescaledLength + Block + (1|UniquePlateID),
                family = beta_family())
#Nested model with DaysPostExposure dropped
mod2c <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain + SDS + RescaledLength + Block + (1|UniquePlateID),
                family = beta_family())
#Nested model with RescaledLength dropped
mod2d <- glmmTMB(data = DataDF,
                formula = LoadPercent~Strain + SDS + DaysPostExposure + Block + (1|UniquePlateID),
                family = beta_family())
AIC(mod2, mod2a, mod2b, mod2c, mod2d) #mod2 is the best model


##Tier 2 alternate (just trying out from mod1 because it was so close to mod1a)
#Saturated model (same as mod1 above)
mod2alt <- glmmTMB(data = DataDF,
                   formula = LoadPercent~Strain*SDS + DaysPostExposure + Block + (1|UniquePlateID),
                   family = beta_family())
#Nested model with DaysPostExposure dropped
mod2alt_a <- glmmTMB(data = DataDF,
                     formula = LoadPercent~Strain*SDS + Block + (1|UniquePlateID),
                     family = beta_family())
AIC(mod2alt, mod2alt_a)


#Winning model is mod1a (= mod2)
summary(mod1a)

#Null model
modnull <- glmmTMB(data = DataDF,
                   formula = LoadPercent~1,
                   family = beta_family())


AIC(mod1a, mod1, mod1b, mod1x, modnull)



####Mandy Prevalence and Load Figure####
PlateDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  group_by(Strain, Block, Timepoint, Plate) %>%
  summarize(PlatePrevalence = mean(IsInfected)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

MeanDF <- PlateDF %>%
  group_by(Strain, DaysPostExposure) %>%
  summarize(MoMPrevalence = mean(PlatePrevalence),
            SD = sd(PlatePrevalence),
            N = n()) %>%
  mutate(SE = SD / sqrt(N))

tiff("./Figures/Manuscript/MandyPrevalence.tiff", units="in", width = 6, height = 5, res=500)
ggplot(data = MeanDF, aes(x = DaysPostExposure, y = MoMPrevalence, group = Strain)) +
  geom_line(color = "darkgrey",
            linewidth = 2,
            position = position_dodge(width = 0.4)) +
  geom_errorbar(aes(x = DaysPostExposure,
                    ymin = MoMPrevalence - SE,
                    ymax = MoMPrevalence + SE),
                linewidth = 1.5,
                width = 0.2,
                position = position_dodge(width = 0.4)) +
  geom_point(aes(shape = Strain,
                 color = Strain),
             size = 7,
             position = position_dodge(width = 0.4)) +
  theme_classic() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(2, 10, 2), limits = c(0.75, 10.25)) +
  scale_color_manual(values = c("black", "orange2")) +
  ylab("Infection Prevalence") +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  theme(legend.position = "")+ #sets legend position currently no legend
  theme(axis.text.x = element_text(face = "bold", color = "grey20", size = 30, vjust=0.8),
        axis.text.y = element_text(face="bold",color = "grey20", size = 30),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.line=element_line(linewidth=2.5),
        axis.ticks=element_line(linewidth=2.5),
        axis.ticks.length=unit(0.1,"inch"))

dev.off()

#Load
PlateDF <- CombinedDF %>%
  filter(SDS == "Pre") %>%
  group_by(Strain, Block, Timepoint, Plate) %>%
  summarize(PlateLoad = mean(LoadPercent)) %>%
  mutate(DaysPostExposure = case_when(Timepoint == 6~8,
                                      Timepoint == 7~10,
                                      .default = Timepoint))

MeanDF <- PlateDF %>%
  group_by(Strain, DaysPostExposure) %>%
  summarize(MoMLoad = mean(PlateLoad),
            SD = sd(PlateLoad),
            N = n()) %>%
  mutate(SE = SD / sqrt(N))

tiff("./Figures/Manuscript/MandyLoad.tiff", units="in", width = 6, height = 5, res=500)
ggplot(data = MeanDF, aes(x = DaysPostExposure, y = MoMLoad, group = Strain)) +
  geom_line(color = "darkgrey",
            linewidth = 2,
            position = position_dodge(width = 0.4)) +
  geom_errorbar(aes(x = DaysPostExposure,
                    ymin = MoMLoad - SE,
                    ymax = MoMLoad + SE),
                linewidth = 1.5,
                width = 0.2,
                position = position_dodge(width = 0.4)) +
  geom_point(aes(shape = Strain,
                 color = Strain),
             size = 7,
             position = position_dodge(width = 0.4)) +
  theme_classic() +
  xlab("Day of Epidemic") +
  scale_x_continuous(breaks=seq(2, 10, 2), limits = c(0.75, 10.25)) +
  scale_color_manual(values = c("black", "orange2")) +
  ylab("Infection Load") +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position = "")+ #sets legend position currently no legend
  theme(axis.text.x = element_text(face = "bold", color = "grey20", size = 30, vjust=0.8),
        axis.text.y = element_text(face="bold",color = "grey20", size = 30),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.line=element_line(linewidth=2.5),
        axis.ticks=element_line(linewidth=2.5),
        axis.ticks.length=unit(0.1,"inch"))
dev.off()









   
  



